/* Extracting data for two-way fixed effect estimation */

#delimit ;

clear all;

local outfile "GWgap_2wFE_extract";
set more off;

di _n "$S_DATE $S_TIME";







*******************************************************************************;
* Extracting raw table from SQL;
*******************************************************************************;

/* An observation is a person-pent combination in a financial (April to March) year.
Age is FTE-weighted across the year, includes all ages.
Wage across the year is summed first, then deflated to real using CPI. To convert 
to per FTE basis, total is divided by sum of FTEs.
Uses financial years (that end in) 2001 to 2018.
*/



global IDI_DB idi_clean_20181020;
global IDI_CONN conn("Driver={SQL Server}; Trusted_Connection=YES; Server=WPRDSQL36.stats.govt.nz,49530;Database=$IDI_DB");

odbc query, $IDI_CONN;
odbc describe "ird_ems", $IDI_CONN;


odbc load, bigint clear $IDI_CONN
	exec("
		SELECT DISTINCT FTE.snz_uid,
			FTE.pent,
			fin_yr = CASE WHEN FTE.dim_month_key - 100*FLOOR(FTE.dim_month_key/100) <= 3 
					THEN FLOOR(FTE.dim_month_key/100)
				WHEN FTE.dim_month_key - 100*FLOOR(FTE.dim_month_key/100) >= 4 
					THEN FLOOR(FTE.dim_month_key/100) + 1 
					ELSE NULL END,
			nom_wage = SUM(FTE.gross_earn),
			sum_fte = SUM(FTE.fte),
			female = CASE WHEN FTE.sex_code='M' THEN 0
				WHEN FTE.sex_code='F' THEN 1
				ELSE NULL END,
			mean_age = SUM(FTE.fte*FTE.age)/SUM(FTE.fte)
		FROM IDI_Sandpit.[clean_read_IR].[pent_emp_mth_FTE_IDI_20181020_RFabling] FTE
		WHERE FTE.dim_month_key >= 200004 AND FTE.dim_month_key<=201803
			AND FTE.sex_code IS NOT NULL
			GROUP BY FTE.snz_uid, FTE.pent, 
			CASE WHEN FTE.dim_month_key - 100*FLOOR(FTE.dim_month_key/100) <= 3 
					THEN FLOOR(FTE.dim_month_key/100)
				WHEN FTE.dim_month_key - 100*FLOOR(FTE.dim_month_key/100) >= 4 
					THEN FLOOR(FTE.dim_month_key/100) + 1 
					ELSE NULL END,
				FTE.sex_code");

tostring snz_uid, replace;

egen pent_gp = group(pent);
egen snz_uid_gp = group(snz_uid);

compress;
preserve;

keep if female==0;
drop female;
save `outfile'_pre_2wFE_m, replace;

restore;
keep if female==1;
drop female;
save `outfile'_pre_2wFE_f, replace;



